# define objects to save model estimates for plotting
diffs <- NA
diffs.se <- NA

# loop through all the different model specifications for this treaty variable
for(j in 1:8){
  for(k in 1:2){
      # load both human rights latent variables from Fariss 2014
if(k==1)data <- read.csv("CYLatentRepressionDynamicStandardDynamicX02.csv")
if(k==2)data <- read.csv("CYLatentRepressionConstantStandardDynamicX02.csv")

# load binary treaty variables
treaty <- read.csv("hr_treaties.csv")
treaty <- subset(treaty, select=c(ccode, year, cat_rn, cescr_rn, cerd_rn, ccpr_rn, cedaw_rn, crc_rn))
names(treaty) <- c("COW", "YEAR", "cat", "cescr", "cerd", "ccpr", "cedaw", "crc")

# this is not correct but necessary for some replications
WRONG <- TRUE
if(WRONG==TRUE)treaty$cat[is.na(treaty$cat)] <- 0
if(WRONG==TRUE)treaty$cescr[is.na(treaty$cescr)] <- 0
if(WRONG==TRUE)treaty$cerd[is.na(treaty$cerd)] <- 0
if(WRONG==TRUE)treaty$ccpr[is.na(treaty$ccpr)] <- 0
if(WRONG==TRUE)treaty$cedaw[is.na(treaty$cedaw)] <- 0
if(WRONG==TRUE)treaty$crc[is.na(treaty$crc)] <- 0

# load polity data
polity <- read.csv("p4v2010.csv", na.strings = c("-66", "-77", "-88", "-99"))
polity2 <- subset(polity, select=c(ccode, year, polity2))

# load updated Gleditsch trade gdp and population data
new.gdp <- read.delim("gdpv6.txt")
new.gdp <- subset(new.gdp, select=c(statenum, year, rgdppc, pop))
names(new.gdp) <- c("ccode", "year", "rgdpch", "pop")


# merge the data together and make lags
data <- merge(data, treaty, by.x=c("COW", "YEAR"), by.y=c("COW", "YEAR"), all.x=TRUE, all.y=FALSE, suffixes = c("",".treaty"))
nrow(data)

data <- merge(data, polity2, by.x=c("COW", "YEAR"), by.y=c("ccode", "year"), all.x=TRUE, all.y=FALSE)
nrow(data)

data <- merge(data, new.gdp, by.x=c("COW", "YEAR"), by.y=c("ccode", "year"), all.x=TRUE, all.y=FALSE)
nrow(data)


data.lag <- data
data.lag$YEAR <- data.lag$YEAR + 1
data <- merge(data, data.lag, by.x=c("COW", "YEAR"), by.y=c("COW", "YEAR"), all.x=TRUE, all.y=FALSE, suffixes = c("",".lag"))

data <- subset(data, !is.na(latentmean.lag) & !is.na(cat.lag) & !is.na(polity2.lag) & !is.na(rgdpch.lag) & !is.na(pop.lag) & YEAR>=1965 & YEAR<=2010)
nrow(data)

newdata <- list()

# take n draws from the posterior distribution and make n new datasets in the list object just created
for(i in 1:1000){
  newdata[[i]] <- data
  newdata[[i]]$draw.lag <- rnorm(cbind(rep(1,nrow(data))), mean=data$latentmean.lag, sd=data$latentsd.lag)
  #newdata[[i]]$draw <- rnorm(cbind(rep(1,nrow(data))), mean=data$latentmean, sd=data$latentsd)
}


# define model formula for each specification
if(j==1)FML <- "latentmean ~ draw.lag + cerd.lag"
if(j==2)FML <- "latentmean ~ draw.lag + cerd.lag + polity2.lag"
if(j==3)FML <- "latentmean ~ draw.lag + cerd.lag + polity2.lag + log(rgdpch.lag)"
if(j==4)FML <- "latentmean ~ draw.lag + cerd.lag + polity2.lag + log(rgdpch.lag) + log(pop.lag)"
if(j==5)FML <- "latentmean ~ draw.lag + cerd.lag + log(rgdpch.lag) + log(pop.lag)"
if(j==6)FML <- "latentmean ~ draw.lag + cerd.lag + log(rgdpch.lag)"
if(j==7)FML <- "latentmean ~ draw.lag + cerd.lag + log(pop.lag)"
if(j==8)FML <- "latentmean ~ draw.lag + cerd.lag + polity2.lag + log(pop.lag)"

# define function to combine multe model estimates (this incorporates uncertatiny from the latent variables, see the supplementary appendix section F for more details about this function)
milm <- function(fml, midata){
 xx <- terms(as.formula(fml))
 lms <- matrix(data=NA, nrow=(length(attr(xx, "term.labels")) + 1), ncol=length(midata))
 ses <- matrix(data=NA, nrow=(length(attr(xx, "term.labels")) + 1), ncol=length(midata))
 vcovs <- list()
  for(i in 1:length(midata)){
    tmp <- lm(formula=as.formula(fml), data=midata[[i]])
    lms[,i] <- tmp$coefficients
    ses[,i] <- sqrt(diag(vcov(tmp)))
    vcovs[[i]] <- vcov(tmp)
  }
 par.est <- apply(lms, 1, mean)
 se.within <- apply(ses, 1, mean)
 se.between <- apply(lms, 1, var)
 se.est <- sqrt(se.within^2 + se.between*(1 + (1/length(midata))))
 list("terms"=names(tmp$coefficients), "beta" = par.est, "SE"=se.est, "vcovs"=vcovs,"coefs" = lms)    
}

# call the milm function with the list of datasets newdata
model <- milm(fml=FML, midata=newdata)

# create object for printing during the loop and then print the results to screen
temp <- data.frame(model$terms, model$beta, model$SE, model$beta/model$SE)

print(paste("Model Output"))
print(temp)

print(as.numeric(temp[temp$model.terms=="cerd.lag",2]))
print(as.numeric(temp[temp$model.terms=="cerd.lag",3]))



par(mar=c(5,7,5,5))

if(k==1)Dynamic.est <- as.numeric(temp[temp$model.terms=="cerd.lag",2])   
if(k==1)Dynamic.se <- as.numeric(temp[temp$model.terms=="cerd.lag",3])

if(k==2)Constant.est <- as.numeric(temp[temp$model.terms=="cerd.lag",2])
if(k==2)Constant.se <-  as.numeric(temp[temp$model.terms=="cerd.lag",3])

# barplot of the difference between treaty coefficient for models using the two different latent human rights variables from Fariss (2014)
if(k==2){
barplot(c(Constant.est, Dynamic.est), ylim=c(-0.08, 0.08), xlim=c(0,3), yaxt="n")
lines(c(.7,.7),c(Constant.est+1.96*Constant.se, Constant.est-1.96*Constant.se))
lines(c(1.9,1.9),c(Dynamic.est+1.96*Dynamic.se, Dynamic.est-1.96*Dynamic.se))
lines(c(.7,.7),c(Constant.est+1*Constant.se, Constant.est-1*Constant.se), lwd=3)
lines(c(1.9,1.9),c(Dynamic.est+1*Dynamic.se, Dynamic.est-1*Dynamic.se), lwd=3)
lines(c(2.9,2.9), c(Dynamic.est, Constant.est), lwd=2)
lines(c(2.75,2.9), c(Constant.est, Constant.est), lwd=2)
lines(c(2.75,2.9), c(Dynamic.est, Dynamic.est), lwd=2)
abline(h=0,col=grey(.75),lwd=1.5, lty=2)
lines(c(2.9,3.2), c(0,0), lwd=2)
mtext(side=2, "Linear Model Coefficients for \nConvention on the Elimination of \nAll Forms of Racial Discrimination", line=3.5, cex=1.7, at=0)
mtext(side=4, "Difference\nBetween Estimates", line=2.25, cex=1.7, at=0)
mtext(side=3, "Model Coefficient\n(Dynamic Standard DV)", line=1.25, cex=1.7, at=1.9)
mtext(side=1, "Model Coefficient\n(Constant Standard DV)", line=3.00, cex=1.7, at=.7)
mtext(side=1, "DV=Latent Physical Integrity", line=3.00, cex=1.0, at=3)
axis(side=2, at=c(-0.08,-0.06,-0.04,-0.02,0,0.02,0.04,0.06,0.08), las=2)


# Z-score for difference
print(paste("Z-score for difference"))
print(Dynamic.est-Constant.est)

Z <- (Dynamic.est-Constant.est)/sqrt(Dynamic.se^2 + Constant.se^2)
print(2*(1-pnorm(Z)))

diffs[j] <- Dynamic.est-Constant.est
diffs.se[j] <- sqrt(Dynamic.se^2 + Constant.se^2)
}

}
}

par(mar=c(5,9,5,1))

diffs[9] <- mean(diffs)
se.within <- mean(diffs.se)
se.between <- var(diffs)
diffs.se[9] <- sqrt(se.within^2 + se.between*(1 + (1/length(diffs))))

diffs <- rev(diffs)
diffs.se <- rev(diffs.se)

# barplot of the differences across all 8 model specifications
par(mar=c(5,9,5,1))
barplot(diffs, space=.5, xlim=c(-.0325,0.1025),horiz=T, xaxt="n", xaxt="n")
abline(v=0,col=1,lwd=1.5, lty=1)
abline(v=0,col=grey(.75),lwd=1.5, lty=2)
#axis(side=1, at=c(-0.03,-0.02,-0.01,0,0.01,0.02,0.03,0.04,0.05,0.06,0.07,0.08,0.09,.1), las=1)
axis(side=1, at=c(-0.02,0,0.02,0.04,0.06,0.08,.1), las=1)
mtext(side=3, "Difference Between Linear Model Coefficients \nfor the Convention on the Elimination \nof All Forms of Racial Discrimination", line=0, cex=1.7, at=0.025)

INDEX <- c(1,2.5,4,5.5,7,8.5,10,11.5,13)
axis(side=2, at=rev(INDEX), labels=c("Model 1", "Model 2", "Model 3", "Model 4", "Model 5", "Model 6", "Model 7", "Model 8", "Model Average"), las=2)

for(i in 1:9){
    lines(c(diffs[i]+1.96*diffs.se[i], diffs[i]-1.96*diffs.se[i]), c(INDEX[i], INDEX[i]), lwd=1)
    lines(c(diffs[i]+1*diffs.se[i], diffs[i]-1*diffs.se[i]), c(INDEX[i], INDEX[i]), lwd=3)
}
box()
